add table of connections found per number of cells in multiplet

packages <- c("CIMseq", "CIMseq.data", "tidyverse")
purrr::walk(packages, library, character.only = TRUE)
rm(packages)

##DATA
load('../data/CIMseqData.rda')
load('../data/sObj.rda')
#rename classes
renameClasses <- function(class) {
  case_when(
    class == "0" ~ "LGR5+.Mki67high",
    class == "1" ~ "LGR5+.Mki67low",
    class == "2" ~ "Goblet",
    class == "3" ~ "Stem",
    class == "4" ~ "TA.late",
    class == "5" ~ "LGR5-.Mki67low",
    class == "6" ~ "Enterocyte",
    class == "7" ~ "TA.early",
    class == "8" ~ "Paneth",
    class == "9" ~ "Tufft",
    class == "10" ~ "Chromaffin",
    class == "11" ~ "Blood",
    TRUE ~ "error"
  )
}

getData(cObjSng, "classification") <- renameClasses(getData(cObjSng, "classification"))
fractions <- getData(sObj, "fractions")
colnames(fractions) <- renameClasses(colnames(fractions))
sObj@fractions <- fractions

Fig 1: Classes

p <- plotUnsupervisedClass(cObjSng, cObjMul)
p

# ggsave(
#   plot = p,
#   filename = 'figures/figure1.pdf',
#   device = cairo_pdf,
#   height = 240,
#   width = 240,
#   units = "mm"
# )

Fig 2: Cell type gene expression

p <- plotUnsupervisedMarkers(
  cObjSng, cObjMul,
  c("Lgr5", "Muc2", "Ptprc", "Chga", "Alpi", "Lyz1", "Dclk1"),
  pal = RColorBrewer::brewer.pal(8, "Set1")
)
p

# ggsave(
#   plot = p,
#   filename = 'figures/figure2.pdf',
#   device = cairo_pdf,
#   height = 240,
#   width = 240,
#   units = "mm"
# )

Fig 3: Cell cycle

p <- plotUnsupervisedMarkers(
  cObjSng, cObjMul, c("Mki67"),
  pal = RColorBrewer::brewer.pal(8, "Set1")
)
p

# ggsave(
#   plot = p,
#   filename = 'figures/figure3.pdf',
#   device = cairo_pdf,
#   height = 240,
#   width = 240,
#   units = "mm"
# )

Fig 4: Connections per multiplet

adj <- adjustFractions(cObjSng, cObjMul, sObj)
table(apply(adj, 1, sum))
## 
##   0   1   2   3   4   5   6 
##  17  93 196  90  32   6   1

Fig 5: Fraction histogram

tibble(fractions = c(fractions)) %>%
  ggplot() +
  geom_histogram(aes(fractions), binwidth = 0.01) +
  theme_bw()

Range of fractions picked after adjustment.

range(fractions[adj == 1])
## [1] 0.01429817 0.99994569

Fig 6: Detected cell types vs. cost

tibble(
  nCellTypes = apply(adj, 1, sum),
  cost = getData(sObj, "costs")
) %>%
  ggplot() +
  geom_boxplot(aes(nCellTypes, cost, group = nCellTypes)) +
  scale_x_continuous(name = "Detected cell types", breaks = 0:max(apply(adj, 1, sum))) +
  theme_bw()

Fig 7: Estimated cell numbers vs. cost

tibble(
  sample = rownames(getData(sObj, "fractions")),
  cost = unname(getData(sObj, "costs"))
) %>%
  inner_join(
    select(estimateCells(cObjSng, cObjMul), sample, estimatedCellNumber), 
    by = "sample"
  ) %>%
  mutate(estimatedCellNumber = round(estimatedCellNumber)) %>%
  ggplot() +
  geom_boxplot(aes(estimatedCellNumber, cost, group = estimatedCellNumber)) +
  scale_x_continuous(
    name = "ERCC estimated cell number", 
    breaks = 0:max(round(pull(estimateCells(cObjSng, cObjMul), estimatedCellNumber)))
  ) +
  theme_bw()

Fig 8: Estimated cell number vs. Detected cell number

ercc <- filter(estimateCells(cObjSng, cObjMul), sampleType == "Multiplet")
nConnections <- apply(adj, 1, sum)
nConnections <- nConnections[match(ercc$sample, names(nConnections))]
tibble(
  detectedConnections = round(nConnections),
  estimatedCellNumber = round(ercc$estimatedCellNumber)
) %>%
  ggplot() +
  geom_boxplot(aes(estimatedCellNumber, detectedConnections, group = estimatedCellNumber)) +
  scale_x_continuous(
    name = "ERCC estimated cell number", 
    breaks = 0:max(round(ercc$estimatedCellNumber))
  ) +
  scale_y_continuous(
    name = "Detected cell number",
    breaks = 0:max(round(nConnections))
  ) +
  theme_bw()

Fig 9: Detected cell number vs. Total counts

tibble(
  sample = names(nConnections),
  detectedConnections = nConnections
) %>%
  inner_join(tibble(
    sample = colnames(getData(cObjMul, "counts")),
    total.counts = colSums(getData(cObjMul, "counts"))
  ), by = "sample") %>%
  ggplot() +
  geom_boxplot(aes(detectedConnections, total.counts, group = detectedConnections)) +
  scale_x_continuous(
    name = "Detected cell number", 
    breaks = 0:max(nConnections)
  ) +
  scale_y_continuous(name = "Total counts") +
  theme_bw()

Fig 10: Detected cell number vs. Total ERCC counts

tibble(
  sample = names(nConnections),
  detectedConnections = nConnections
) %>%
  inner_join(tibble(
    sample = colnames(getData(cObjMul, "counts")),
    total.ercc = colSums(getData(cObjMul, "counts.ercc"))
  ), by = "sample") %>%
  ggplot() +
  geom_boxplot(aes(detectedConnections, total.ercc, group = detectedConnections)) +
  scale_x_continuous(
    name = "Detected cell number", 
    breaks = 0:max(nConnections)
  ) +
  scale_y_continuous(name = "Total ERCC counts") +
  theme_bw()

Fig 11: Connections

#pdf('figures/figure4.pdf', width = 9.5, height = 9.5)
plotSwarmCircos(sObj, cObjSng, cObjMul, classOrder = c(
    "Paneth", "Stem", "LGR5+.Mki67low", "LGR5+.Mki67high", "LGR5-.Mki67low",
    "TA.early", "TA.late", "Enterocyte", "Goblet", 
    "Tufft", "Chromaffin", "Blood"
))

#dev.off()

Fig 12: Filtered

Only detected duplicates, triplicates, and quadruplicates.
ERCC estimated cell number set to max 4.
Weight cutoff = 5.

adj <- adjustFractions(cObjSng, cObjMul, sObj, binary = TRUE)
samples <- rownames(adj)
rs <- rowSums(adj)
keep <- rs == 2 | rs == 3 | rs == 4

#pdf('figures/figure5.pdf', width = 9.5, height = 9.5)
plotSwarmCircos(
  filterSwarm(sObj, keep), cObjSng, cObjMul, weightCut = 5, 
  classOrder = c(
    "Paneth", "Stem", "LGR5+.Mki67low", "LGR5+.Mki67high", "LGR5-.Mki67low",
    "TA.early", "TA.late", "Enterocyte", "Goblet", 
    "Tufft", "Chromaffin", "Blood"
    ), theoretical.max = 4
)

#dev.off()